****************************************************
/* 		0.2 Program setup		 */
****************************************************

*Lee Bounds 

capture program drop lb_malawi
program define lb_malawi, rclass

syntax varlist, select(varlist) [controls(varlist)]

tempvar rand neg_rank rank n pcrank neg_pcrank 

*get regression-adjusted attrition
reg `select' merit relative i.y1_standard

*calculate trimming proportions
sum `select' if control==1
scalar p_merit=_b[merit]/(r(mean)+_b[merit])
scalar p_relative= _b[relative]/(r(mean)+_b[relative])

*random number to break ties
gen `rand'=uniform()

*rank the outcome
bysort `select' y1_rand (`varlist' `rand'): gen `rank'=_n
bysort `select' y1_rand: egen `n'=count(`varlist')

gen `neg_rank'=`n'-(`rank'-1)

gen `pcrank'=`rank'/`n'
gen `neg_pcrank'=`neg_rank'/`n'

corr `rank' `varlist'

*Full Sample
reg `varlist' merit relative `controls' i.y1_standard if `select'==1 
return scalar main_merit=_b[merit]
return scalar main_relative=_b[relative]
return scalar main_N=e(N)

*Lower bound--trim upper p percent of observations from each group
reg `varlist' merit relative  `controls' i.y1_standard if `select'==1 & ///
	((control==1)|(merit==1 & `neg_pcrank'>p_merit)| ///
	(relative==1 & `neg_pcrank'>p_relative))
return scalar lower_merit=_b[merit]
return scalar lower_relative=_b[relative]
return scalar lower_N=e(N)

*Upper bound--trim lower p percent of observations from each group
reg `varlist' merit relative  `controls' i.y1_standard if `select'==1 & ///
	((control==1)|(merit==1 & `pcrank'>p_merit)| ///
	(relative==1 & `pcrank'>p_relative))
return scalar upper_merit=_b[merit]
return scalar upper_relative=_b[relative]
return scalar upper_N=e(N)

end

********************************************************************************

estimates clear

*setting seed here to ensure stability
set seed 040319

*Set reps
local reps 500

use "$data_path/merged_y1y2.dta", clear
keep if y1_sub==1 & y1_blq==1 

	lab var merit "\textit{Standard}"
	lab var relative "\textit{Relative}"

lb_malawi f1_pct, select(f1_e_p) controls(b1_z_total)
bootstrap main_merit=r(main_merit) main_relative=r(main_relative) ///
	lower_merit=r(lower_merit) lower_relative=r(lower_relative) ///
	upper_merit=r(upper_merit) upper_relative=r(upper_relative) ///
	main_N=r(main_N) lower_N=r(lower_N) upper_N=r(upper_N), ///
	reps(`reps') cluster(y1_school_class): ///
	lb_malawi f1_pct, select(f1_e_p)  controls(b1_pct)

mat results=r(table)
mat b=results[1,1..2]
mat colnames b = merit relative
mat se=results[2,1..2]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo pct_main
mat b=results[1,3..4]
mat colnames b = merit relative
mat se=results[2,3..4]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo pct_lower
mat b=results[1,5..6]
mat colnames b = merit relative
mat se=results[2,5..6]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo pct_upper


lb_malawi f1_z_total, select(f1_e_p) controls(b1_z_total)
bootstrap main_merit=r(main_merit) main_relative=r(main_relative) ///
	lower_merit=r(lower_merit) lower_relative=r(lower_relative) ///
	upper_merit=r(upper_merit) upper_relative=r(upper_relative) ///
	main_N=r(main_N) lower_N=r(lower_N) upper_N=r(upper_N), ///
	reps(`reps') cluster(y1_school_class): ///
	lb_malawi f1_z_total, select(f1_e_p)  controls(b1_z_total)

mat results=r(table)
mat b=results[1,1..2]
mat colnames b = merit relative
mat se=results[2,1..2]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo norm_main
mat b=results[1,3..4]
mat colnames b = merit relative
mat se=results[2,3..4]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo norm_lower
mat b=results[1,5..6]
mat colnames b = merit relative
mat se=results[2,5..6]
mat colnames se = merit relative
ereturn post b
estadd matrix se
scalar N=results[1,7]
estadd scalar N
eststo norm_upper

local label leebounds
local caption "Lee (2009) Bounds of Main Test Score Estimates"
local notes \item Notes: Lower (upper) bounds are computed by trimming the highest (lowest) ///
	observations in the scholarship treatment groups. The fraction of trimmed observations ///
	equals the relative difference in attrition, computed from Column 4 of Table \ref{tab:attrition}. ///
	Standard errors are in parentheses and are constructed using 500 bootstrap samples, ///
	where classes are sampled to account for clustering. ///
 	All specifications include grade fixed effects and the baseline value of the outcome ///
	variable. * denotes significance at 0.10; ** at 0.05; and *** at 0.01. 

head_foot, caption(`caption') label(`label') notes(`notes') size(small) columns(6)

esttab using "$tex_path//leebounds.tex", ///
	replace keep(merit relative) cells(b(fmt(3) star) se(par fmt(3)))  star(* 0.10 ** 0.05 *** 0.01)  ///
	mgroups("Main" "Lower Bound" "Upper Bound" "Main" "Lower Bound" "Upper Bound", pattern(1 1 1 1 1 1) ///
		span erepeat(\cmidrule(lr){@span}) ///
		begin("\rule{0pt}{3ex}\rule{0pt}{2ex}&\multicolumn{3}{c}{Exam Rank}&\multicolumn{3}{c}{Exam Score (Norm)} \\ \cmidrule(r){2-4}\cmidrule(r){5-7}\rule{0pt}{3ex}")) ///
	stats(N, fmt(0) labels("N"))  ///
	label varlabels(, end("" [1ex]) nolast) mlabels(none) collabels(none) nonotes addnotes()  ///
	prehead("$header") postfoot("$footer")

estimates clear

*Attrition interactions with baseline score

gen base=b1_z_total
lab var base "Baseline"
gen merit_base=merit*b1_z_total
lab var merit_base "Baseline $ \times$ \textit{Standard}"
gen relative_base=relative*b1_z_total
lab var relative_base "Baseline $ \times$ \textit{Relative}"


eststo: reg f1_e_p merit relative i.y1_standard, cluster(y1_school_class)
eststo: reg f1_e_p base i.y1_standard, cluster(y1_school_class)
eststo: reg f1_e_p merit relative base merit_base relative_base i.y1_standard, cluster(y1_school_class)

replace base=b1_top15
replace merit_base=m_b1top15  
replace relative_base=r_b1top15
eststo: reg f1_e_p base i.y1_standard, cluster(y1_school_class)
eststo: reg f1_e_p merit relative base merit_base relative_base i.y1_standard, cluster(y1_school_class)

esttab, drop(*standard _cons) se

local label attrit-interact
local caption "Attrition on First Follow-up Exam by Scholarship Treatment and Baseline Test Score"


local notes \item Notes: Each column regresses attrition on the first follow-up exam on the variables indicated. ///
			Standard errors, in parentheses, are clustered at the school-grade level. ///
			All specifications include grade fixed effects. ///
			Columns 2 and 3 use the continuous measure of the baseline test score, ///
			 while Columns 4 and 5 use a dummy indicating whether the student was in the top 15 percent ///
			 at baseline. * denotes significance at 0.10; ** at 0.05; and *** at 0.01.

head_foot, notes(`notes') caption(`caption') label(`label') size(small) columns(5)

esttab using "$tex_path//attrit_interact.tex", ///
	replace drop(*standard _cons) ///
	cells(b(fmt(3) star) se(par fmt(3)))  star(* 0.10 ** 0.05 *** 0.01)  ///
	mgroups("" " Baseline Score" " Top 15 Percent", pattern(1 1 0 1 0) ///
		span prefix(\multicolumn{@span}{c}{\centering) suffix(}) ///
		erepeat(\cmidrule(lr){@span}) ///
		begin("\rule{0pt}{3ex}\rule{0pt}{2ex} & & \multicolumn{4}{c}{Baseline Variable} \\ \cmidrule(r){3-6}\rule{0pt}{3ex}")) ///
	stats(N, fmt(0) labels("N"))  ///
	label varlabels(, end("" [1ex]) nolast) mlabels(none) collabels(none) nonotes addnotes()  ///
	prehead("$header") postfoot("$footer")

estimates clear
